n <- 50 # number of trajectories per noise level
noises <- seq(0.2, 3, 0.2)
freq <- 1 # compute sinus every 1 time unit
# Phase shifted
ps <- multi_sims(type = "ps", noises = noises, n = n, freq = freq)
ps
## Time variable value noise
## 1: 0 V1 0.0000000 0
## 2: 1 V1 0.8414710 0
## 3: 2 V1 0.9092974 0
## 4: 3 V1 0.1411200 0
## 5: 4 V1 -0.7568025 0
## ---
## 79996: 95 V50 -0.6622294 3
## 79997: 96 V50 0.2727111 3
## 79998: 97 V50 0.9569223 3
## 79999: 98 V50 0.7613435 3
## 80000: 99 V50 -0.1342110 3
# Phase shifted with trend
pst <- multi_sims(type = "pst", noises = noises, n = n, slope = 0.1, freq = freq)
# Amplitude noise
na <- multi_sims(type = "na", noises = noises, n = n, freq = freq)
plot_sim(ps)
plot_sim(pst)
plot_sim(na)
For each condition, this computes the distance between the average trajectory and each individual trajectory.
cond <- "noise"
ti <- "Time"
mea <- "value"
lab <- "variable"
ps_mean <- dist_mean(data = ps, condition = cond, tcol = ti, measure = mea, label = lab, return.mean = F)
ps_mean
## noise variable euclid_to_mean
## 1: 0 V1 0.000000
## 2: 0 V2 0.000000
## 3: 0 V3 0.000000
## 4: 0 V4 0.000000
## 5: 0 V5 0.000000
## ---
## 796: 3 V46 6.957651
## 797: 3 V47 6.799990
## 798: 3 V48 6.338835
## 799: 3 V49 6.452373
## 800: 3 V50 7.543223
pst_mean <- dist_mean(data = pst, condition = cond, tcol = ti, measure = mea, label = lab, return.mean = F)
na_mean <- dist_mean(data = na, condition = cond, tcol = ti, measure = mea, label = lab, return.mean = F)
For each condition, take each pair of trajectories and compute the correlations between them (Pearson, Spearman and Kendall). In addition it clips the trajectories by comparison with their rolling means: whenever the trajectory is above its rolling mean, set it to 1 otherwise to 0. Finally for each pair of trajectories, the “overlap” metric, represents how often the trajectories of the pair have same value (i.e. 0.5 if complete random, 1 if they fully overlap).
This takes quite a while (partially) because it is implemented with for loops, but is still performed in reasonable time (i.e. a few minutes)
ps_pw <- all_pairwise_stats(data = ps, condition = cond, label = lab, measure = mea, k_roll_mean = 5)
ps_pw
## noise Label1 Label2 Overlap Pearson Spearman Kendall
## 1: 0 V1 V2 1.00 1.00000000 1.000000000 1.0000000000
## 2: 0 V1 V3 1.00 1.00000000 1.000000000 1.0000000000
## 3: 0 V1 V4 1.00 1.00000000 1.000000000 1.0000000000
## 4: 0 V1 V5 1.00 1.00000000 1.000000000 1.0000000000
## 5: 0 V1 V6 1.00 1.00000000 1.000000000 1.0000000000
## ---
## 19596: 3 V47 V49 0.40 -0.23733070 -0.202256226 -0.1377777778
## 19597: 3 V47 V50 0.08 -0.97603113 -0.973021302 -0.8626262626
## 19598: 3 V48 V49 0.89 0.94887913 0.951407141 0.8161616162
## 19599: 3 V48 V50 0.41 -0.29638004 -0.275751575 -0.1834343434
## 19600: 3 V49 V50 0.52 0.02022916 -0.002508251 0.0004040404
pst_pw <- all_pairwise_stats(data = pst, condition = cond, label = lab, measure = mea, k_roll_mean = 5)
na_pw <- all_pairwise_stats(data = na, condition = cond, label = lab, measure = mea, k_roll_mean = 5)
cor(ps_pw[, 4:7])
## Overlap Pearson Spearman Kendall
## Overlap 1.0000000 0.9888383 0.9905414 0.9983633
## Pearson 0.9888383 1.0000000 0.9997731 0.9903708
## Spearman 0.9905414 0.9997731 1.0000000 0.9921283
## Kendall 0.9983633 0.9903708 0.9921283 1.0000000
cor(pst_pw[, 4:7])
## Overlap Pearson Spearman Kendall
## Overlap 1.0000000 0.9894297 0.9897065 0.9857815
## Pearson 0.9894297 1.0000000 0.9998155 0.9733336
## Spearman 0.9897065 0.9998155 1.0000000 0.9745133
## Kendall 0.9857815 0.9733336 0.9745133 1.0000000
cor(na_pw[, 4:7])
## Overlap Pearson Spearman Kendall
## Overlap 1.0000000 0.9524214 0.9535094 0.9674259
## Pearson 0.9524214 1.0000000 0.9961731 0.9808621
## Spearman 0.9535094 0.9961731 1.0000000 0.9808846
## Kendall 0.9674259 0.9808621 0.9808846 1.0000000
For each condition, count the proportion of trajectories that are lying more than x standard deviation from the mean (conversion to z-score first?).
Get a p-value: Assume that they are normally distributed, check that the distributions lies above 0 for correlations and above 0.5 for clipping (t-test).
ps_pw[noise !=0 , .(p.val.pears = t.test(Pearson)$p.value,
p.val.spear = t.test(Spearman)$p.value,
p.val.kenda = t.test(Kendall)$p.value,
p.val.ovrlp = t.test(Overlap, mu = 0.5)$p.value), by = noise]
## noise p.val.pears p.val.spear p.val.kenda p.val.ovrlp
## 1: 0.2 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 2: 0.4 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 3: 0.6 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 4: 0.8 2.474567e-138 1.049048e-136 1.034254e-134 1.055455e-130
## 5: 1.0 2.493862e-140 6.910645e-139 7.692519e-135 1.416701e-128
## 6: 1.2 5.749557e-34 2.167627e-34 7.169215e-34 3.514574e-33
## 7: 1.4 2.019084e-02 2.420775e-02 2.747831e-02 2.148950e-02
## 8: 1.6 8.869206e-01 9.078501e-01 8.690393e-01 9.146619e-01
## 9: 1.8 3.019246e-05 2.762564e-05 3.251085e-05 3.816894e-05
## 10: 2.0 9.188862e-01 9.059285e-01 7.920694e-01 7.309774e-01
## 11: 2.2 1.987143e-01 1.945878e-01 1.664291e-01 1.796073e-01
## 12: 2.4 6.747538e-01 6.901232e-01 6.969219e-01 7.264563e-01
## 13: 2.6 4.175480e-01 4.123208e-01 4.313395e-01 4.549496e-01
## 14: 2.8 3.083033e-01 3.167636e-01 3.653296e-01 3.565258e-01
## 15: 3.0 6.988920e-01 7.066568e-01 6.593373e-01 6.280499e-01
pst_pw[noise !=0 , .(p.val.pears = t.test(Pearson)$p.value,
p.val.spear = t.test(Spearman)$p.value,
p.val.kenda = t.test(Kendall)$p.value,
p.val.ovrlp = t.test(Overlap, mu = 0.5)$p.value), by = noise]
## noise p.val.pears p.val.spear p.val.kenda p.val.ovrlp
## 1: 0.2 0 0 0 0.000000e+00
## 2: 0.4 0 0 0 0.000000e+00
## 3: 0.6 0 0 0 0.000000e+00
## 4: 0.8 0 0 0 1.351011e-250
## 5: 1.0 0 0 0 3.070666e-66
## 6: 1.2 0 0 0 1.327521e-47
## 7: 1.4 0 0 0 5.823719e-05
## 8: 1.6 0 0 0 1.035992e-04
## 9: 1.8 0 0 0 1.546523e-01
## 10: 2.0 0 0 0 6.632032e-01
## 11: 2.2 0 0 0 9.954057e-09
## 12: 2.4 0 0 0 2.961142e-01
## 13: 2.6 0 0 0 6.034313e-01
## 14: 2.8 0 0 0 4.142044e-01
## 15: 3.0 0 0 0 2.985493e-01
na_pw[noise !=0 , .(p.val.pears = t.test(Pearson)$p.value,
p.val.spear = t.test(Spearman)$p.value,
p.val.kenda = t.test(Kendall)$p.value,
p.val.ovrlp = t.test(Overlap, mu = 0.5)$p.value), by = noise]
## noise p.val.pears p.val.spear p.val.kenda p.val.ovrlp
## 1: 0.2 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 2: 0.4 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 3: 0.6 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 4: 0.8 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 5: 1.0 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 6: 1.2 0.000000e+00 0.000000e+00 0.000000e+00 8.474046e-294
## 7: 1.4 0.000000e+00 0.000000e+00 0.000000e+00 4.348222e-187
## 8: 1.6 0.000000e+00 0.000000e+00 0.000000e+00 4.190186e-131
## 9: 1.8 1.587742e-261 1.777216e-257 4.372247e-255 3.720415e-77
## 10: 2.0 3.411216e-250 7.318420e-232 7.017019e-231 4.345561e-68
## 11: 2.2 6.071381e-144 6.366061e-137 3.982633e-136 3.223184e-33
## 12: 2.4 4.035454e-116 2.781478e-104 8.909083e-103 8.435194e-24
## 13: 2.6 2.904902e-112 7.045786e-102 3.235532e-101 8.840405e-16
## 14: 2.8 2.698114e-73 6.407591e-72 5.305944e-72 3.122165e-18
## 15: 3.0 2.820193e-85 3.114461e-79 8.704064e-79 9.873313e-20
If we increase sampling rate, it means that there is more points per oscillation. Visually this means that the average trajectory is smoother.
plot_sim(sim_phase_shifted(n = 50, noise = 0.4, freq = 2), use.facet = F)
plot_sim(sim_phase_shifted(n = 50, noise = 0.4, freq = 0.2), use.facet = F)
The role of the rolling mean in clipping, is to “cut the oscillation in 2”. A good case is like:
# Contains 6 to 7 points per oscillation
sim <- sim_phase_shifted(n = 1, noise = 0, freq = 1)
plot(sim$Time, sim$value, type = "b")
lines(x=sim$Time, y=rollex(sim$value, k = 6), col = "red", lwd = 1.5)
# clip trajectory and scale for clarity on the plot
clip_traj <- wrap_clip(x = sim$value, k = 6)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x=sim$Time, y=clip_traj, col = "blue", type = "S")
It is a good case since the rolling mean (in red), cuts the trajectory (in black) at half-period (i.e. near to 0). This results in a clipped trajectory (in blue) that is up when the oscillation is “above 0” and down when its “below 0”.
Now we multiply the sampling rate by 5, but keep the same window size for the moving average in the top case, and multiply it also by 5 in the bottom case:
par(mfrow = c(2,1))
# Contains more than 35 points per oscillation
sim <- sim_phase_shifted(n = 1, noise = 0, freq = 0.2)
size_window <- 6
plot(sim$Time, sim$value, type = "b")
lines(x = sim$Time, y = rollex(sim$value, k = size_window), col = "red", lwd = 1.5)
clip_traj <- wrap_clip(x = sim$value, k = size_window)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x = sim$Time, y = clip_traj, col = "blue", type = "S")
size_window <- 35
plot(sim$Time, sim$value, type = "b")
lines(x = sim$Time, y = rollex(sim$value, k = size_window), col = "red", lwd = 1.5)
clip_traj <- wrap_clip(x = sim$value, k = size_window)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x = sim$Time, y = clip_traj, col = "blue", type = "S")
As we can see the moving average follows the signal perfectly with the small window, whereas it is out of phase with the larger window with much smaller amplitude. Clipping trajectory is ok in both cases. Nevertheless the top case is bad, why? Don’t forget that we’ve been working with perfect sinusoid so far, let’s see what happens with a tiny bit of noise in the amplitude of the signal:
par(mfrow = c(2,1))
# Contains more than 35 points per oscillation
sim <- sim_noisy_amplitude(n = 1, noise = 0.1, freq = 0.2)
size_window <- 6
plot(sim$Time, sim$value, type = "b")
lines(x = sim$Time, y = rollex(sim$value, k = size_window), col = "red", lwd = 1.5)
clip_traj <- wrap_clip(x = sim$value, k = size_window)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x = sim$Time, y = clip_traj, col = "blue", type = "S")
size_window <- 35
plot(sim$Time, sim$value, type = "b")
lines(x = sim$Time, y = rollex(sim$value, k = size_window), col = "red", lwd = 1.5)
clip_traj <- wrap_clip(x = sim$value, k = size_window)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x = sim$Time, y = clip_traj, col = "blue", type = "S")
As we observe, clipped trajectory becomes completely random with the small window, whereas it remains identical with the larger one. So we should choose the window size for the rolling mean such that it covers the number of points in an oscillation.
On multiple simulations the distributions look like:
# Already defined in section 4.
# na <- multi_sims(type = "na", noises = noises, n = n, freq = 1)
# na_pw <- all_pairwise_stats(data = na, condition = cond, label = lab, measure = mea, k_roll_mean = 5)
# q3 <- ggplot(na_pw, aes(x = noise, y = Overlap)) + geom_boxplot(aes(group = noise)) + ggtitle("Noisy Amplitude") + scale_y_continuous(limits = c(0,1))
na_high_freq <- multi_sims(type = "na", noises = noises, n = n, freq = 0.2)
na_high_freq_pw <- all_pairwise_stats(data = na_high_freq, condition = cond, label = lab, measure = mea, k_roll_mean = 5)
q3_high <- ggplot(na_high_freq_pw, aes(x = noise, y = Overlap)) + geom_boxplot(aes(group = noise)) + ggtitle("Window Size: 5; Freq: 0.2 (window too small)") + scale_y_continuous(limits = c(0,1))
grid.arrange(q3 + ggtitle("Window Size: 5; Freq: 1 (window of the right size)"), q3_high, ncol = 2)